import pandas as pd
import plotly.express as px
import numpy as np
df = pd.read_csv('insurance.csv')
df.head()
| age | sex | bmi | children | smoker | region | charges | |
|---|---|---|---|---|---|---|---|
| 0 | 19 | female | 27.900 | 0 | yes | southwest | 16884.92400 |
| 1 | 18 | male | 33.770 | 1 | no | southeast | 1725.55230 |
| 2 | 28 | male | 33.000 | 3 | no | southeast | 4449.46200 |
| 3 | 33 | male | 22.705 | 0 | no | northwest | 21984.47061 |
| 4 | 32 | male | 28.880 | 0 | no | northwest | 3866.85520 |
df.region.value_counts()
southeast 364 southwest 325 northwest 325 northeast 324 Name: region, dtype: int64
df.shape
(1338, 7)
results = pd.read_csv('results.csv')
results.head()
| Age | Sex | Bmi | Children | Smoker | Region | ActualValue | PredictedValue | |
|---|---|---|---|---|---|---|---|---|
| 0 | 31.0 | -1.0 | 25.740 | 0.0 | 1.0 | -1.0 | 3756.62160 | 4082.469330 |
| 1 | 25.0 | 1.0 | 26.220 | 0.0 | 1.0 | 0.5 | 2721.32080 | 3014.207023 |
| 2 | 23.0 | 1.0 | 34.400 | 0.0 | 1.0 | -0.5 | 1826.84300 | 4813.924792 |
| 3 | 27.0 | 1.0 | 42.130 | 0.0 | -1.0 | -1.0 | 39611.75770 | 31416.639078 |
| 4 | 60.0 | -1.0 | 36.005 | 0.0 | 1.0 | 0.5 | 13228.84695 | 15080.698229 |
fig = px.scatter(results, x='ActualValue', y='PredictedValue')
x = np.linspace(results.ActualValue.min(), results.ActualValue.max(), num=1000)
fig.add_scatter(x=x, y=x, name='y=x')
fig.show()